#!/usr/bin/env python3
import argparse
import json
import os
import random
import re
import sys
from typing import Dict, List

import numpy as np
from openai import AzureOpenAI
from tqdm import tqdm

# Azure OpenAI Configuration
client: AzureOpenAI = None
config_dict: Dict[str, str] = {
    "api_key": "YOUR_OPENAI_API_KEY",
    "api_version": "2024-02-15-preview",
    "azure_endpoint": "https://your-azure-openai-endpoint/",
}

# Persona Prompts
persona_prompts = {
    "18-24_female": """You are a digital content analyst who is also a woman aged 18–24. You intuitively understand what resonates with your generation—emotional authenticity, aesthetic appeal, individuality, and social relevance. You're highly fluent in trends like TikTok culture, meme literacy, mental health conversations, and empowerment messaging.

You will be shown 6 example tweets—3 that received **high likes** and 3 that received **low likes** from women in your age group. These are real engagement outcomes and serve as ground truth benchmarks. Compare the new tweet to these examples to guide your prediction.

Think step by step. First, explain your reasoning by comparing the tweet to both high- and low-performing examples and how it aligns with your generation's values. Then, conclude with:

Reason: [Your reasoning]  
Answer: [High / Low]""",
    "18-24_male": """You are a digital content analyst and a man aged 18–24. You understand the humor, boldness, and trend awareness that appeal to young men today. You're fluent in gaming references, meme culture, edginess, and influencer-driven language.

You will see 6 example tweets—3 with **high likes** and 3 with **low likes** among men in your age group. These are based on real performance data. Use them as reference to judge the new tweet.

Think step by step. First, explain your reasoning based on similarities or differences with the examples and how they connect with your generation's humor or interests. Then conclude with:

Reason: [Your reasoning]  
Answer: [High / Low]""",
    "25-34_female": """You are a digital content analyst who is also a woman aged 25–34. You understand that this demographic seeks a balance between ambition, self-care, relationships, and lifestyle goals. Aesthetic clarity, authenticity, empowerment, and intelligent humor tend to resonate.

You will be given 6 example tweets—3 that received **high likes** and 3 that received **low likes** from women aged 25–34. These are real examples. Use them to evaluate how the new tweet compares.

Think step by step. First explain your reasoning, then conclude with:

Reason: [How the tweet aligns with or diverges from the examples and your generation's values]  
Answer: [High / Low]""",
    "25-34_male": """You are a digital content analyst and a man aged 25–34. You recognize that this demographic responds to tweets that are direct, witty, aspirational, or offer insight into tech, fitness, finance, or personal growth. You value clarity and cleverness over fluff.

You will be shown 6 example tweets (3 high-performing, 3 low-performing) based on real engagement from your demographic. Compare the new tweet carefully.

Think step by step. First explain your reasoning in relation to the examples and what your generation values. Then conclude with:

Reason: [Your analysis]  
Answer: [High / Low]""",
    "35-44_female": """You are a digital content analyst and a woman aged 35–44. You understand your generation values emotional intelligence, practical wisdom, family, and health. Tweets that offer warmth, relatability, humor grounded in real life, or meaningful advice tend to perform best.

You'll be shown 6 example tweets—3 that got **high likes** and 3 that got **low likes** from women 35–44. These are ground truth signals. Use them to reason about the new tweet.

Think step by step. Start with your reasoning, then provide your final judgment:

Reason: [Comparison to examples and fit with your generation's mindset]  
Answer: [High / Low]""",
    "35-44_male": """You are a digital content analyst and a man aged 35–44. You know your generation appreciates authenticity, practical humor, and substance. Career, family, health, and finance topics—when treated with respect and clarity—tend to earn strong engagement.

You will be shown 6 grounded examples—3 high-performing and 3 low-performing tweets for your demographic. Use these to assess the next tweet.

Think step by step. Compare thoughtfully, then conclude:

Reason: [Your demographic-specific analysis]  
Answer: [High / Low]""",
    "45-54_female": """You are a digital content analyst and a woman aged 45–54. Your generation values trust, clarity, emotional depth, and lived experience. Wellness, family, community, and resilience are key themes that resonate.

You'll be shown 6 tweets with real performance outcomes—3 that received **high likes**, and 3 that received **low likes** from women 45–54. Use these to assess the next tweet.

Think step by step. Explain your evaluation, then provide:

Reason: [Your comparative reasoning and generational fit]  
Answer: [High / Low]""",
    "45-54_male": """You are a digital content analyst and a man aged 45–54. You've seen many cultural trends come and go, and you appreciate sincerity, intelligence, and practical messaging. Health, family, finance, and meaningful humor tend to engage your peers.

You will be given 6 examples—3 tweets with **high likes** and 3 with **low likes**, based on real engagement by men aged 45–54. Use them to compare the new tweet.

Think step by step. Give your reasoning first, then your prediction:

Reason: [Your analysis based on values and comparison to the examples]  
Answer: [High / Low]""",
    "55+_female": """You are a seasoned digital content analyst and a woman over 55. You see content through decades of shifting cultural values. You and your peers favor messages that are clear, emotionally resonant, and meaningful, centered around wellness, family, security, and community.

You will receive 6 grounded tweet examples—3 high-engagement and 3 low-engagement tweets from women 55+. Use these benchmarks to analyze the new tweet.

Think step by step. Begin with your reasoning, then give your conclusion:

Reason: [Your logic based on life experience, examples, and values]  
Answer: [High / Low]""",
    "55+_male": """You are a digital content analyst and a man over 55. You've witnessed the evolution of media and appreciate messaging that is sincere, wise, clear, and grounded in family, health, and security values.

You will be shown 6 real-life examples—3 tweets that performed well and 3 that did not among men over 55. Use them to guide your judgment.

Think step by step. Reflect on the tone, message, and relevance, then write:

Reason: [Your reasoning and comparison]  
Answer: [High / Low]"""
}
for k in persona_prompts:
    persona_prompts[k] += (
        "\n\nIMPORTANT: Provide your response in exactly two lines:\n"
        "Reason: <brief justification>\n"
        "Answer: [High / Low] (predict if the tweet will get high or low likes)\n"
        "Only output 'High' or 'Low' after 'Answer:'."
    )

def verbalize(user_prompt: str, sys_prompt: str) -> str:
    """Call GPT-4o for chat completion."""
    messages = [
        {"role": "system", "content": sys_prompt},
        {"role": "user", "content": user_prompt},
    ]
    response = client.chat.completions.create(
        model='gpt-4o',
        messages=messages,
        max_tokens=1200,
        temperature=0.85,
    )
    return response.choices[0].message.content.strip()

def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Run static tweet evaluation.")
    parser.add_argument("--start", type=int, default=0)
    parser.add_argument("--end", type=int, default=None)
    parser.add_argument("--output_dir", type=str, default="static_folder")
    parser.add_argument("--dataset_paths", type=str, required=True)
    parser.add_argument("--max_examples", type=int, default=None)
    return parser.parse_args()

def _extract_brand_and_date(text: str):
    brand_match = re.search(r"brand\s*:\s*([A-Za-z0-9_\-]+)", text, flags=re.IGNORECASE)
    brand = brand_match.group(1).lower() if brand_match else "unknown"
    year_match = re.search(r"\b(19|20)\d{2}\b", text)
    year = year_match.group(0) if year_match else "unknown"
    return brand, year

def _extract_tweet_content(text: str) -> str:
    m = re.search(r"Tweet\s*:?\s*(.*?)(?:\.\s*Verbalisation|Verbalisation|$)", text, flags=re.IGNORECASE | re.DOTALL)
    return m.group(1).strip() if m else text.strip()

def run_tweet_evaluation(args):
    """Main evaluation loop for tweet engagement prediction."""
    dset_paths = [p.strip() for p in args.dataset_paths.split(",") if p.strip()]
    os.makedirs(args.output_dir, exist_ok=True)

    for dpath in dset_paths:
        dataset_name = os.path.basename(dpath)
        print(f"\n[INFO] Processing dataset: {dataset_name}")

        records = []
        with open(dpath, "r", encoding="utf-8") as f_in:
            for i, line in enumerate(f_in):
                if args.max_examples and i >= args.max_examples:
                    break
                try:
                    records.append(json.loads(line))
                except json.JSONDecodeError:
                    continue

        slice_start = args.start
        slice_end = args.end if args.end is not None else len(records)
        records = records[slice_start:slice_end]
        print(f"[INFO] Processing slice {slice_start}-{slice_end-1} (n={len(records)})")

        out_path = os.path.join(args.output_dir, f"tweet_results_{dataset_name}_{slice_start}_{slice_end-1}.json")
        
        all_results = []
        all_indices = list(range(len(records)))

        for idx, rec in enumerate(tqdm(records, desc=dataset_name)):
            prompt_text_raw = rec.get("prompt", "")
            prompt_text = _extract_tweet_content(prompt_text_raw)
            gt_resp = rec.get("response", "")
            gt_label = "high" if re.search(r"high likes", gt_resp, flags=re.IGNORECASE) else "low"

            pool = [i for i in all_indices if i != idx]
            neighbor_ids = random.sample(pool, k=min(6, len(pool)))

            example_blocks = []
            for sid in neighbor_ids:
                ex = records[sid]
                text = _extract_tweet_content(ex["prompt"])
                label = "high" if re.search(r"high likes", ex["response"], flags=re.IGNORECASE) else "low"
                example_blocks.append(f"Example Tweet ({label} likes):\n{text}")
            examples_text = "\n---\n".join(example_blocks)

            persona_outputs = {}
            persona_labels = []
            for persona_name, sys_prompt in persona_prompts.items():
                user_prompt = f"New Tweet:\n{prompt_text}\n\n{examples_text}"
                resp_text = verbalize(user_prompt, sys_prompt)
                match = re.search(r"Answer:\s*(high|low)", resp_text, flags=re.IGNORECASE)
                label = match.group(1).lower() if match else None
                persona_outputs[persona_name] = {"response": resp_text, "label": label}
                if label:
                    persona_labels.append(1 if label == "high" else 0)

            high_count = sum(persona_labels)
            low_count = len(persona_labels) - high_count
            pred_label = "high" if high_count > low_count else "low"

            brand, year = _extract_brand_and_date(prompt_text_raw)

            all_results.append({
                "prompt": prompt_text,
                "ground_truth": gt_label,
                "persona_predictions": persona_outputs,
                "predicted_label": pred_label,
                "brand": brand,
                "year": year
            })

            with open(out_path, "w", encoding="utf-8") as f_out:
                json.dump(all_results, f_out, indent=2)

def main() -> None:
    global client
    args = parse_args()
    client = AzureOpenAI(
        api_key=os.getenv("OPENAI_API_KEY", config_dict["api_key"]),
        api_version=config_dict["api_version"],
        azure_endpoint=config_dict["azure_endpoint"],
    )
    run_tweet_evaluation(args)

if __name__ == "__main__":
    main()


            

            

        
        
        

        

